Load needed packages

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.5     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.4     ✓ stringr 1.4.0
## ✓ readr   2.0.2     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(rstan)
## Loading required package: StanHeaders
## rstan (Version 2.21.3, GitRev: 2e1f913d3ca3)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## 
## Attaching package: 'rstan'
## The following object is masked from 'package:tidyr':
## 
##     extract
library(bayesplot)
## This is bayesplot version 1.8.1
## - Online documentation and vignettes at mc-stan.org/bayesplot
## - bayesplot theme set to bayesplot::theme_default()
##    * Does _not_ affect other ggplot2 plots
##    * See ?bayesplot_theme_set for details on theme setting

Exercise 1

  1. The steps for grid approximation are:
  1. Define a discrete grid of possible theta values
  2. evaluate the prior pdf of f(theta) and liklihood function L(theta|y) at each theta grid value 3)Calculate the product of f(theta) * L(theta|y) at each theta grid value and then normalize the products so they sum to 1
  3. randomly sample N theta grid values
  1. To make the approximation more accurate I would change the first step when defining a discrete grid. Increasing the number of grid values creates approximations that better match true posteriors.

Exercise 2

#Using code Yue shared to include picture

knitr::include_graphics("/Users/brendaonyango/Desktop/IMG_4318 copy.png")

Exercise 3

  1. If the chain is mixing too slowly it will have only explored a limited range in the first several thousands iterations. It will overestimate the plausibility of pi values in this range and underestimate the plasuibility of values outside the range.

  2. The chain having a high correlation will not estimate the correct peak, if there is one, in the posterior.

  3. Chains that get stuck are overestimating in the values where they are stuck and will produce peaks in the posterior that are not there.

Exercise 6.4

  1. MCMC diagnostics are important because simulations are not perfect and diagnostics, used holistically, can identify how to improve a MCMC chain.

  2. MCMC simulations are helpful because they are an efficient and flexible alternative to grid approximations. Grid approximations for more complex problems take a long time and grid approximations are not good for estimating posteriors when there are multiple dimensions. MCMCs can.

  3. Rstan combines R with the Stan “engine”; Stan is written in C++ and is good for Bayesian modeling.

  4. How to put Markov chains in applied language for social problems/questions.

Exercise 6.5

We’re given Y|pi~Bin(n, pi) and pi~Beta(3, 8) and n = 10 with Y = 2.

  1. Use grid approximation with values pi E{0, 0.25, 0.50, .75,1} to approximate posterior
#step 1: defining a grid with 5 pi values 
grid_data <- data.frame(pi_grid = seq(from = 0, to = 1, length = 5))

# step 2: evaluating prior and liklihood at each pi
grid_data <- grid_data |> 
  mutate(prior = dbeta(pi_grid, 3,8), 
         liklihood =dbinom(2,10, pi_grid))

Next is approximating the posterior using the product of the liklihood and prior and then normalizing them.

#step 3: approximate the posterior
grid_data <- grid_data |> 
  mutate(unnormalized = liklihood * prior, 
         posterior = unnormalized/sum(unnormalized))

#confirming that posterior approximation sums to 1
grid_data |> summarize(sum(unnormalized), sum(posterior))
##   sum(unnormalized) sum(posterior)
## 1         0.8765603              1

Next we’ll examine the grid approxiation posterior rounded to 2 decimal places.

#step 4
round(grid_data, 2) #2 is indicating how many decimal points to use 
##   pi_grid prior liklihood unnormalized posterior
## 1    0.00  0.00      0.00         0.00      0.00
## 2    0.25  3.00      0.28         0.85      0.96
## 3    0.50  0.70      0.04         0.03      0.04
## 4    0.75  0.01      0.00         0.00      0.00
## 5    1.00  0.00      0.00         0.00      0.00

Finally, we’ll plot this model.

# plotting the grid approximation posterior
ggplot(grid_data, aes(x = pi_grid, y = posterior)) + 
  geom_point() + 
  geom_segment(aes(x = pi_grid, xend = pi_grid, y = 0, yend = posterior))

  1. Now we’re asked to repeat the above using a grid of 201 equally spaced values between 0 and 1.
#step 1
grd_data <- data.frame(p_grid = seq(from = 0, to = 1, length = 201)) #changed length to 201

#step 2
grd_data <- grd_data |> #changed names of data, prior, liklihood to keep distinct from part a 
  mutate(prir = dbeta(p_grid, 3,8), #prior remained the same 
         liklhood = dbinom(2,10, p_grid)) #successes and outcomes remained teh same
#step 3
grd_data <- grd_data |> 
  mutate(unnormalized = liklhood * prir, 
         posterir = unnormalized/sum(unnormalized))

#confirming that posterior approximation sums to 1
grd_data |> summarize(sum(unnormalized), sum(posterir))
##   sum(unnormalized) sum(posterir)
## 1          41.79567             1
#step 4
round(grd_data, 2)
##     p_grid prir liklhood unnormalized posterir
## 1     0.00 0.00     0.00         0.00     0.00
## 2     0.00 0.01     0.00         0.00     0.00
## 3     0.01 0.03     0.00         0.00     0.00
## 4     0.01 0.07     0.01         0.00     0.00
## 5     0.02 0.13     0.02         0.00     0.00
## 6     0.03 0.19     0.02         0.00     0.00
## 7     0.03 0.26     0.03         0.01     0.00
## 8     0.04 0.34     0.04         0.01     0.00
## 9     0.04 0.43     0.05         0.02     0.00
## 10    0.04 0.53     0.06         0.03     0.00
## 11    0.05 0.63     0.07         0.05     0.00
## 12    0.06 0.73     0.09         0.06     0.00
## 13    0.06 0.84     0.10         0.08     0.00
## 14    0.06 0.95     0.11         0.11     0.00
## 15    0.07 1.06     0.12         0.13     0.00
## 16    0.07 1.17     0.14         0.16     0.00
## 17    0.08 1.29     0.15         0.19     0.00
## 18    0.09 1.40     0.16         0.22     0.01
## 19    0.09 1.51     0.17         0.26     0.01
## 20    0.10 1.62     0.18         0.30     0.01
## 21    0.10 1.72     0.19         0.33     0.01
## 22    0.10 1.83     0.20         0.37     0.01
## 23    0.11 1.93     0.21         0.41     0.01
## 24    0.12 2.02     0.22         0.45     0.01
## 25    0.12 2.12     0.23         0.49     0.01
## 26    0.12 2.21     0.24         0.53     0.01
## 27    0.13 2.30     0.25         0.57     0.01
## 28    0.14 2.38     0.26         0.61     0.01
## 29    0.14 2.45     0.26         0.65     0.02
## 30    0.14 2.53     0.27         0.68     0.02
## 31    0.15 2.60     0.28         0.72     0.02
## 32    0.16 2.66     0.28         0.75     0.02
## 33    0.16 2.72     0.29         0.78     0.02
## 34    0.16 2.77     0.29         0.80     0.02
## 35    0.17 2.82     0.29         0.83     0.02
## 36    0.18 2.87     0.30         0.85     0.02
## 37    0.18 2.91     0.30         0.87     0.02
## 38    0.18 2.94     0.30         0.88     0.02
## 39    0.19 2.97     0.30         0.89     0.02
## 40    0.20 3.00     0.30         0.90     0.02
## 41    0.20 3.02     0.30         0.91     0.02
## 42    0.21 3.04     0.30         0.92     0.02
## 43    0.21 3.05     0.30         0.92     0.02
## 44    0.22 3.06     0.30         0.92     0.02
## 45    0.22 3.06     0.30         0.91     0.02
## 46    0.22 3.06     0.30         0.91     0.02
## 47    0.23 3.06     0.29         0.90     0.02
## 48    0.24 3.05     0.29         0.89     0.02
## 49    0.24 3.04     0.29         0.88     0.02
## 50    0.24 3.02     0.29         0.86     0.02
## 51    0.25 3.00     0.28         0.85     0.02
## 52    0.26 2.98     0.28         0.83     0.02
## 53    0.26 2.96     0.27         0.81     0.02
## 54    0.26 2.93     0.27         0.79     0.02
## 55    0.27 2.90     0.26         0.77     0.02
## 56    0.28 2.87     0.26         0.74     0.02
## 57    0.28 2.83     0.25         0.72     0.02
## 58    0.29 2.79     0.25         0.70     0.02
## 59    0.29 2.75     0.24         0.67     0.02
## 60    0.30 2.71     0.24         0.65     0.02
## 61    0.30 2.67     0.23         0.62     0.01
## 62    0.30 2.62     0.23         0.60     0.01
## 63    0.31 2.58     0.22         0.57     0.01
## 64    0.32 2.53     0.22         0.55     0.01
## 65    0.32 2.48     0.21         0.52     0.01
## 66    0.32 2.43     0.20         0.50     0.01
## 67    0.33 2.38     0.20         0.47     0.01
## 68    0.34 2.32     0.19         0.45     0.01
## 69    0.34 2.27     0.19         0.43     0.01
## 70    0.35 2.22     0.18         0.40     0.01
## 71    0.35 2.16     0.18         0.38     0.01
## 72    0.36 2.11     0.17         0.36     0.01
## 73    0.36 2.05     0.16         0.34     0.01
## 74    0.36 2.00     0.16         0.32     0.01
## 75    0.37 1.94     0.15         0.30     0.01
## 76    0.38 1.89     0.15         0.28     0.01
## 77    0.38 1.83     0.14         0.26     0.01
## 78    0.38 1.78     0.14         0.24     0.01
## 79    0.39 1.72     0.13         0.23     0.01
## 80    0.40 1.67     0.13         0.21     0.01
## 81    0.40 1.61     0.12         0.19     0.00
## 82    0.41 1.56     0.12         0.18     0.00
## 83    0.41 1.51     0.11         0.17     0.00
## 84    0.42 1.45     0.11         0.15     0.00
## 85    0.42 1.40     0.10         0.14     0.00
## 86    0.42 1.35     0.10         0.13     0.00
## 87    0.43 1.30     0.09         0.12     0.00
## 88    0.44 1.25     0.09         0.11     0.00
## 89    0.44 1.20     0.08         0.10     0.00
## 90    0.44 1.16     0.08         0.09     0.00
## 91    0.45 1.11     0.08         0.08     0.00
## 92    0.46 1.06     0.07         0.08     0.00
## 93    0.46 1.02     0.07         0.07     0.00
## 94    0.47 0.98     0.07         0.06     0.00
## 95    0.47 0.93     0.06         0.06     0.00
## 96    0.48 0.89     0.06         0.05     0.00
## 97    0.48 0.85     0.06         0.05     0.00
## 98    0.48 0.81     0.05         0.04     0.00
## 99    0.49 0.78     0.05         0.04     0.00
## 100   0.50 0.74     0.05         0.03     0.00
## 101   0.50 0.70     0.04         0.03     0.00
## 102   0.50 0.67     0.04         0.03     0.00
## 103   0.51 0.64     0.04         0.02     0.00
## 104   0.52 0.60     0.04         0.02     0.00
## 105   0.52 0.57     0.03         0.02     0.00
## 106   0.52 0.54     0.03         0.02     0.00
## 107   0.53 0.51     0.03         0.02     0.00
## 108   0.54 0.48     0.03         0.01     0.00
## 109   0.54 0.46     0.03         0.01     0.00
## 110   0.54 0.43     0.02         0.01     0.00
## 111   0.55 0.41     0.02         0.01     0.00
## 112   0.56 0.38     0.02         0.01     0.00
## 113   0.56 0.36     0.02         0.01     0.00
## 114   0.57 0.34     0.02         0.01     0.00
## 115   0.57 0.32     0.02         0.01     0.00
## 116   0.58 0.30     0.02         0.00     0.00
## 117   0.58 0.28     0.01         0.00     0.00
## 118   0.58 0.26     0.01         0.00     0.00
## 119   0.59 0.24     0.01         0.00     0.00
## 120   0.60 0.23     0.01         0.00     0.00
## 121   0.60 0.21     0.01         0.00     0.00
## 122   0.60 0.20     0.01         0.00     0.00
## 123   0.61 0.18     0.01         0.00     0.00
## 124   0.62 0.17     0.01         0.00     0.00
## 125   0.62 0.16     0.01         0.00     0.00
## 126   0.62 0.15     0.01         0.00     0.00
## 127   0.63 0.14     0.01         0.00     0.00
## 128   0.64 0.13     0.01         0.00     0.00
## 129   0.64 0.12     0.01         0.00     0.00
## 130   0.64 0.11     0.00         0.00     0.00
## 131   0.65 0.10     0.00         0.00     0.00
## 132   0.66 0.09     0.00         0.00     0.00
## 133   0.66 0.08     0.00         0.00     0.00
## 134   0.66 0.08     0.00         0.00     0.00
## 135   0.67 0.07     0.00         0.00     0.00
## 136   0.68 0.06     0.00         0.00     0.00
## 137   0.68 0.06     0.00         0.00     0.00
## 138   0.69 0.05     0.00         0.00     0.00
## 139   0.69 0.05     0.00         0.00     0.00
## 140   0.70 0.04     0.00         0.00     0.00
## 141   0.70 0.04     0.00         0.00     0.00
## 142   0.70 0.03     0.00         0.00     0.00
## 143   0.71 0.03     0.00         0.00     0.00
## 144   0.72 0.03     0.00         0.00     0.00
## 145   0.72 0.03     0.00         0.00     0.00
## 146   0.72 0.02     0.00         0.00     0.00
## 147   0.73 0.02     0.00         0.00     0.00
## 148   0.74 0.02     0.00         0.00     0.00
## 149   0.74 0.02     0.00         0.00     0.00
## 150   0.74 0.01     0.00         0.00     0.00
## 151   0.75 0.01     0.00         0.00     0.00
## 152   0.76 0.01     0.00         0.00     0.00
## 153   0.76 0.01     0.00         0.00     0.00
## 154   0.76 0.01     0.00         0.00     0.00
## 155   0.77 0.01     0.00         0.00     0.00
## 156   0.78 0.01     0.00         0.00     0.00
## 157   0.78 0.01     0.00         0.00     0.00
## 158   0.78 0.00     0.00         0.00     0.00
## 159   0.79 0.00     0.00         0.00     0.00
## 160   0.80 0.00     0.00         0.00     0.00
## 161   0.80 0.00     0.00         0.00     0.00
## 162   0.80 0.00     0.00         0.00     0.00
## 163   0.81 0.00     0.00         0.00     0.00
## 164   0.82 0.00     0.00         0.00     0.00
## 165   0.82 0.00     0.00         0.00     0.00
## 166   0.83 0.00     0.00         0.00     0.00
## 167   0.83 0.00     0.00         0.00     0.00
## 168   0.84 0.00     0.00         0.00     0.00
## 169   0.84 0.00     0.00         0.00     0.00
## 170   0.84 0.00     0.00         0.00     0.00
## 171   0.85 0.00     0.00         0.00     0.00
## 172   0.86 0.00     0.00         0.00     0.00
## 173   0.86 0.00     0.00         0.00     0.00
## 174   0.86 0.00     0.00         0.00     0.00
## 175   0.87 0.00     0.00         0.00     0.00
## 176   0.88 0.00     0.00         0.00     0.00
## 177   0.88 0.00     0.00         0.00     0.00
## 178   0.88 0.00     0.00         0.00     0.00
## 179   0.89 0.00     0.00         0.00     0.00
## 180   0.90 0.00     0.00         0.00     0.00
## 181   0.90 0.00     0.00         0.00     0.00
## 182   0.90 0.00     0.00         0.00     0.00
## 183   0.91 0.00     0.00         0.00     0.00
## 184   0.92 0.00     0.00         0.00     0.00
## 185   0.92 0.00     0.00         0.00     0.00
## 186   0.92 0.00     0.00         0.00     0.00
## 187   0.93 0.00     0.00         0.00     0.00
## 188   0.94 0.00     0.00         0.00     0.00
## 189   0.94 0.00     0.00         0.00     0.00
## 190   0.95 0.00     0.00         0.00     0.00
## 191   0.95 0.00     0.00         0.00     0.00
## 192   0.96 0.00     0.00         0.00     0.00
## 193   0.96 0.00     0.00         0.00     0.00
## 194   0.96 0.00     0.00         0.00     0.00
## 195   0.97 0.00     0.00         0.00     0.00
## 196   0.98 0.00     0.00         0.00     0.00
## 197   0.98 0.00     0.00         0.00     0.00
## 198   0.98 0.00     0.00         0.00     0.00
## 199   0.99 0.00     0.00         0.00     0.00
## 200   1.00 0.00     0.00         0.00     0.00
## 201   1.00 0.00     0.00         0.00     0.00

Next I’ll plot this approximation.

ggplot(grd_data, aes(x = p_grid, y = posterir)) + 
  geom_point() + 
  geom_segment(aes(x = p_grid, xend = p_grid, y = 0, yend = posterir))

We can see that b gives a much better estimate than a.

Exercise 6.6

We’re given a Gamma-Poison model for Y|lambda~Pois(lambda) and lambda~Gamma(20,5). N = 3 and (Y1, Y2, Y3) = (0, 1, 0).

  1. Use grid approximation with values E{0, 1, 2,…8} to approximate posterior model lambda.
# Step 1: Define a grid of 10 lambda values
grid_data   <- data.frame(lambda_grid = seq(from = 0, to = 8, length = 8)) 

# Step 2: Evaluate the prior & likelihood at each lambda
grid_data <- grid_data |> 
  mutate(prior = dgamma(lambda_grid, 20, 5), #given prior
         likelihood = dpois(0, lambda_grid) * dpois(1, lambda_grid) * dpois(0, lambda_grid))

# Step 3: Approximate the posterior
grid_data <- grid_data |> 
  mutate(unnormalized = likelihood * prior,
         posterior = unnormalized / sum(unnormalized))

# Set the seed
set.seed(1500)

# Step 4: sample from the discretized posterior
post_sample <- sample_n(grid_data, size = 10000, 
                        weight = posterior, replace = TRUE)

#plotting histogram of the above 

ggplot(post_sample, aes(x = lambda_grid)) + 
  geom_histogram(aes(y = ..density..), color = "white") + 
  stat_function(fun = dgamma, args = list(20, 5)) + 
  lims(x = c(0, 8))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).

  1. Repeat the above using a grid of 201 with equally spaced values between 0 & 8.
# Step 1: Define a grid of 10 lambda values
grid_data   <- data.frame(lambda_grid = seq(from = 0, to = 8, length = 201)) # went up to 10 values to see what happens after 8

# Step 2: Evaluate the prior & likelihood at each lambda
grid_data <- grid_data |> 
  mutate(prior = dgamma(lambda_grid, 20, 5), #given prior
         likelihood = dpois(0, lambda_grid) * dpois(1, lambda_grid) * dpois(0, lambda_grid))

# Step 3: Approximate the posterior
grid_data <- grid_data |> 
  mutate(unnormalized = likelihood * prior,
         posterior = unnormalized / sum(unnormalized))

# Set the seed
set.seed(1500)

# Step 4: sample from the discretized posterior
post_sample <- sample_n(grid_data, size = 10000, 
                        weight = posterior, replace = TRUE)

#plotting histogram of the above 

ggplot(post_sample, aes(x = lambda_grid)) + 
  geom_histogram(aes(y = ..density..), color = "white") + 
  stat_function(fun = dgamma, args = list(20, 5)) + 
  lims(x = c(0, 8))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).

Exercise 6.9

  1. The drawback of grid approximation is ineffiency for estimating large,scale